import urllib.request
from lxml import etree
import random


url = 'https://list.jd.com/list.html?cat=1713,3261,3361&page=1'
ua = ["Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0",
          "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
          "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393",
          "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",# 谷歌浏览器
          "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5514.400 QQBrowser/10.1.1614.400", # QQ浏览器
          "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",  # IE浏览器
          ]

req = urllib.request.Request(url)
req.add_header("User-Agent", random.choice(ua))
data = urllib.request.urlopen(req).read().decode("utf-8", "ignore")
print(len(data))
print(type(data))
treedata = etree.HTML(data)
print(len(treedata))


li_list = treedata.xpath('//div[@id="plist"]/ul[@class="gl-warp clearfix"]//li[@class="gl-item"]')
print(li_list)
x1=1

"""
for i in range(len(li_list)):
            # 获取书名
            bookname = li_list[i].xpath('//div[@class="p-name"]/a/em/text()')
            print("第"+str(x1)+"本书")
            print(bookname[i])
            x1+=1
            print("-----------")
"""

for li in li_list:
    # 获取书名
    bookname = li.xpath('.//div[@class="p-name"]/a/em/text()')
    print("第" + str(x1) + "本书")
    print(bookname[0])
    x1 += 1
    print("-----------")

#漏了一本书，第34本书——爱因斯坦自述——这个问题没有解决。。。。。


